********************************************************************************
* Purpose: Sample selectio + merge of all data sets, create and label variables and save final analysis data
*********************************************************************************

do "D:\Data\workdata\708296\Project - Mother groups and mental health\Do\global.do" 

*********************************************************************************
* sample selection documentation alongside the merge
*********************************************************************************

use "$work\formaps.dta",clear
keep if treated != .
keep mfr_muni treated
save "$work\novaxmuni", replace


*All births between 2012-2017
use "$work\mfr_sample",clear

merge m:1 mfr_muni using "$work\novaxmuni", keep(3) nogen 

*novax munics with good coverage years in novax (contacts file)
keep if mfr_yob>=treated
drop treated

* drop multiple births here. not in same groups
drop if mfr_multiplebirth==1 //219,239


*Only munics that use novax to register both groups and screenings: 
merge m:1 mfr_yob mfr_muni using "$work\munics_grupper", keep(3) nogen

merge m:1 mfr_yob mfr_muni using "$work\munics_tested", keep(3) keepusing(mfr_yob mfr_muni)
egen novaxmunigroups = count(mfr_yob) 

* individuals in a group:  
merge 1:1 pnr using "$work\grupper_wide" , keep(3) nogen // individuals in groups 78,449 - in paper we state 80,032

egen group = group(GRUPPEID1)
egen ingroup = count(mfr_yob)

* 4: Drop unrealistically small groups: 76,747
egen g_members = count(mfr_yob), by(group)
drop if g_members < 3
egen ingroup1 = count(mfr_yob)


*epds
merge m:1 mfr_yob mfr_muni using "$work\munics_tested", keep(3) nogen keepusing(mfr_yob mfr_muni)

merge 1:1 pnr using "$work\efterfodsel_mor.dta", keep(1 3) nogen
merge 1:1 pnr using "$work\newassignment", nogen keep(1 3)

********************************************************************
* background and districts 
********************************************************************

merge 1:1 pnr using "$work\background", keep(3) nogen // all merged

merge 1:1 pnr using "$work\districts.dta", keep(1 3) nogen // districts


********************************************************************
* merge on other stuff
* outcomes
********************************************************************
* GP outcomes kids
merge 1:1 pnr using "$work\gp_honorar", keep(1 3) nogen // gp expenses child
foreach var of varlist sumhonorar* {
replace `var' = 0 if `var' == .	
}

merge 1:1 cprm mfr_dob using "$work\gpm_honorar", keep(1 3) nogen // gp expenses moms
foreach var of varlist sumhonorarm* {
replace `var' = 0 if `var' == .	
}

* NOVAX visits and nurse info
merge 1:1 pnr using "$work\shp_first.dta", keep(1 3) nogen keepusing(shp_first)

* add nurse visits/contacts here
merge 1:1 pnr using "$work\novax_atspecificmonth", keep(1 3) nogen

* contacts and psych tests...
merge m:1 cprm mfr_dob using "$work\conssssy_m", keep(1 3) nogen
foreach var of varlist gpm_cons* {
replace `var' = 0 if `var' == .	
replace `var' = 1 if `var' > 0
}

merge m:1 cprm mfr_dob using "$work\conssssy_monthly", keep(1 3) nogen // monthly gp couns mom
foreach var of varlist gpmw_cons* {
replace `var' = 0 if `var' == .	
replace `var' = 1 if `var' > 0
}

merge m:1 cprm mfr_dob using "$work\conssssy_monthly_pre", keep(1 3) nogen // monthly gp couns mom
foreach var of varlist gpmw_cons* {
replace `var' = 0 if `var' == .	
replace `var' = 1 if `var' > 0
}

merge m:1 cprm mfr_dob using "$work\psyksssy_m", keep(1 3) nogen // yearly psyk mom also prebirth used in descriptives
foreach var of varlist gpm_psyk* {
replace `var' = 0 if `var' == .	
replace `var' = 1 if `var' > 0
}

merge m:1 cprm mfr_dob using "$work\psyksssy_monthly", keep(1 3) nogen // monthly psyk mom
foreach var of varlist gpmw_psyk* {
replace `var' = 0 if `var' == .	
replace `var' = 1 if `var' > 0
}

merge m:1 cprm mfr_dob using "$work\psyksssy_monthly_pre", keep(1 3) nogen // monthly psyk mom
foreach var of varlist gpmw_psyk* {
replace `var' = 0 if `var' == .	
replace `var' = 1 if `var' > 0
}

merge m:1 cprm mfr_dob using "$work\outmoms_monthly", keep(1 3) nogen // outpat outcomes
foreach var of varlist out* {
replace `var' = 0 if `var' == .	
replace `var' = 1 if `var' > 0
}

merge m:1 cprm mfr_dob using "$work\inmoms_monthly", keep(1 3) nogen // inpat outcomes
foreach var of varlist inp* {
replace `var' = 0 if `var' == .	
replace `var' = 1 if `var' > 0
}

merge m:1 cprm mfr_dob using "$work\outmoms_monthly_pre", keep(1 3) nogen // outpat outcomes
foreach var of varlist out* {
replace `var' = 0 if `var' == .	
replace `var' = 1 if `var' > 0
}

merge m:1 cprm mfr_dob using "$work\inmoms_monthly_pre", keep(1 3) nogen // inpat outcomes
foreach var of varlist inp* {
replace `var' = 0 if `var' == .	
replace `var' = 1 if `var' > 0
}

merge m:1 cprm mfr_dob using "$work\psykkontaktmoms", keep(1 3) nogen // prebirth psyk hosp
foreach var of varlist  postbirth* { // early
replace `var' = 0 if `var' == .	
replace `var' = 1 if `var' > 0
}

* TRACES survey
merge 1:1 pnr using "work\SPOR", keep(1 3) nogen

*Dad mental health outcomes
merge m:1 cprf mfr_dob using "$work\conssssy_f", keep(1 3) nogen // fathers psyk outcomes 1 yearly
foreach var of varlist  gpf_cons1 gpf_cons2    {
replace `var' = 0 if `var' == .	
replace `var' = 1 if `var' > 0
}
merge m:1 cprf mfr_dob using "$work\psyksssy_f", keep(1 3) nogen // fathers psyk outcomes 2 yearly
foreach var of varlist  gpf_psyk1 gpf_psyk2  {
replace `var' = 0 if `var' == .	
replace `var' = 1 if `var' > 0
}
merge m:1 cprf mfr_dob using "$work\psykkontaktdads", keep(1 3) nogen // fathers psyk outcomes 3 yearly
foreach var of varlist  postbirthf* {
replace `var' = 0 if `var' == .	
replace `var' = 1 if `var' > 0
}

// Robust mental health outcomes (excl first 6mth after birth)
merge m:1 cprm mfr_dob using "$work\psyksssy_robust", keep(1 3) nogen // drop first 6m contacts
foreach var of varlist gpm_psyk_robust {
replace `var' = 0 if `var' == .	
replace `var' = 1 if `var' > 0
}
merge m:1 cprm mfr_dob using "$work\conssssy_robust", keep(1 3) nogen // drop first 6m contacts
foreach var of varlist gpm_cons_robust {
replace `var' = 0 if `var' == .	
replace `var' = 1 if `var' > 0
}
merge m:1 cprm mfr_dob using "$work\psykkontaktmoms_robust", keep(1 3) nogen // drop first 6m contacts
foreach var of varlist postbirth*{
replace `var' = 0 if `var' == .	
replace `var' = 1 if `var' > 0
}

//Fertility and cohabition outcomes
merge 1:1 pnr using "$work\fertility", keep(1 3) nogen // fertility
merge 1:1 pnr using "$work\cohab", keep(1 3) nogen // cohabition outcome

//Hospitalizations outcomes - child and mom
merge 1:1 pnr using "$work\hospitalizationskids", keep(1 3) nogen // child hospitalization outcomes inpat
foreach v of varlist hospage*{
replace `v' = 0 if `v' == .
}
merge m:1 id_mother mfr_dob using "$work\hospitalizationsmoms", keep(1 3) nogen // same for mothers
foreach v of varlist hospagem*{
replace `v' = 0 if `v' == .
}

// Labor market outcomes
merge m:1 id_mother mfr_dob using "$work\barsel", keep (1 3) nogen 
merge m:1 id_mother mfr_dob using "$work\work", keep (1 3) nogen
merge m:1 id_mother mfr_dob using "$work\sick", keep (1 3) nogen
merge m:1 id_father mfr_dob using "$work\barself", keep (1 3) nogen
merge m:1 id_father mfr_dob using "$work\workf", keep (1 3) nogen
merge m:1 id_father mfr_dob using "$work\sickf", keep (1 3) nogen
merge m:1 id_mother mfr_dob using "$work\employment", keep (1 3) nogen
merge m:1 id_father mfr_dob using "$work\employment_f", keep (1 3) nogen
merge m:1 id_mother mfr_dob using "$work\UIbenefits", keep (1 3) nogen
merge m:1 id_father mfr_dob using "$work\UIbenefitsf", keep (1 3) nogen
foreach var of varlist sick* work* barsel* empl* ui* {
	replace `var' = 0 if `var' == .
}
compress
save "$work\merged_raw.dta",replace


*****************************************************
* recodings
* Treatment variables etc.
*****************************************************
use "$work\merged_raw.dta", clear

egen nurse = group(shp_first)
egen nurse2 = group(GruppeSHPL)

// gen high EPDS score indicator EPDS>10
gen high = 0
replace high = 1 if score_e1 > 10
replace high = 0 if score_e1 == .

gen tested = 0 
replace tested = 1 if score_e1 != .

//demean variables
foreach var in mfr_cs mfr_paritet mfr_female mfr_homebirth mfr_first mfr_lowbw mfr_preterm inc_m inc_f grund_m grund_f higher_m higher_f uni_m uni_f dk_m dk_f married cohab alder_m alder_f high tested score_e1 {
rangestat `var', int(group 0 0) excludeself
egen `var'_avg = mean(`var'), by(group)
}

// gen treatment variable - at least one with high EPDS score in group
gen peer_treat = 0
replace peer_treat = 1 if high_mean > 0

gen peer_pseduo = 1 - peer_treat
gen mob = month(mfr_dob)
egen time = group(mob mfr_yob)

gen psykw1 = gpmw_psyk1 // first month
gen consw1 = gpmw_cons1 // first month
gen outw1 = out1 // first month 
gen inpw1 = inp1 // first month

foreach v in cons psyk { // code to accumulate
forvalues i = 2/24 {
local j = `i' - 1	
gen `v'w`i' = max(`v'w`j' , gpmw_`v'`i')
}
}
foreach v in inp out {
forvalues i = 2/24 {
local j = `i' - 1	
gen `v'w`i' = max(`v'w`j' ,`v'`i')
}
}
forvalues i = 1/24 {
gen adm`i' =  max(inpw`i',outw`i') // just a hospitalization
}
* not correct! why? 
//ATH: Hvis den skal være ens med post, så skal der vel stå max(inp`i'_pre, out`i'_pre)??
forvalues i = 1/12 {
gen adm`i'_pre = max(inp`i'_pre, inp`i'_pre) //max(inp`i'_pre)
}
forvalues i = 1/24 {
gen anyw`i' = max(adm`i', consw`i', psykw`i') // any contact
}
*check!
forvalues i = 1/12 {
gen any`i'_pre = max(inp`i'_pre, gpmw_psyk`i'_pre, gpmw_cons`i'_pre) // not accumulated for pre contacts
}
replace gpm_cons2 = 1 if gpm_cons1 + gpm_cons2 > 0
replace gpm_psyk2 = 1 if gpm_psyk1 + gpm_psyk2 > 0

gen postbirth2y = 0
replace postbirth2y = 1 if postbirth2y_out == 1 | postbirth2y_in == 1 | postbirth1y_out == 1 | postbirth1y_in == 1
gen any2 = 0
foreach v in gpm_cons2 gpm_psyk2 postbirth2y {
replace any2 = 1 if `v' == 1	
}	
replace gpf_cons2 = 1 if gpf_cons1 + gpf_cons2 > 0
replace gpf_psyk2 = 1 if gpf_psyk1 + gpf_psyk2 > 0
gen postbirthf2y = 0
replace postbirthf2y = 1 if postbirthf2y_out == 1 | postbirthf2y_in == 1 | postbirthf1y_out == 1 | postbirthf1y_in == 1
gen any2f = 0
foreach v in gpf_cons2 gpf_psyk2 postbirthf2y {
replace any2f = 1 if `v' == 1	
}	
gen any2_robust = 0
foreach v in gpm_cons_robust gpm_psyk_robust postbirth2y_robust {
replace any2_robust = 1 if `v' == 1	
}	
gen age_m = date_gruppe - mfr_dob
replace dis_ = "" if dis_ == "   ."

*****************************************************
* district FE 
*****************************************************
egen fe = group(dis_)
replace fe = fe + 1000
replace fe = mfr_muni if fe == .

*****************************************************
* Labels
*****************************************************
label var married "Married"
label var cohab "Cohabiting"
label var dk_f "Danish, father"
label var uni_f "Uni. degree, father"
label var higher_f "Higher educ, father"
label var grund_f "Prim. school, father"
label var inc_f "Income, father"
label var dk_m "Danish, mother"
label var uni_m "Uni. degree, mother"
label var higher_m "Higher educ, mother"
label var grund_m "Prim. school, mother"
label var inc_m "Income, mother"
label var mfr_preterm "Preterm birth"
label var mfr_first "First-time mothers" 
label var mfr_childnight "Hosp. nights at birth, child"
label var mfr_homebirth "Home birth"
label var mfr_lowbw "Low birth weight"
label var mfr_cs "C-section"
label var mfr_female "Child sex"
label var alder_m "Age, mother"
label var alder_f "Age, father"
label var mfr_paritet "Birth order"
label var gpmw_psyk1y_pre "Psychologist/psychiatrist 1y prior to birth"
label var gpmw_cons1y_pre "GP mental health consultation 1y prior to birth"
label var inp1y_pre "Inpatient mental health hosp. 1y prior to birth"
label var out1y_pre "Outpatient mental health hosp. 1y prior to birth"
label var high "High screening score"
label var score_e1 "Screening score"
label var any2f "\specialcell{Mental health \\ contact \\ by year 2}"
label var gpf_cons2 "\specialcell{GP mental health \\ consultation \\ by year 2}"
label var gpf_psyk2 "\specialcell{Psychologist \\ psychiatrist \\ by year 2}"
label var postbirthf2y "\specialcell{Mental health \\ hospital \\ contact \\ by year 2}" /*???*/
label var postbirth2y "\specialcell{Mental health \\ hospitalization \\ by year 2}"
label var anyw24 "\specialcell{Mental health \\ contact \\ by year 2}"
label var consw24 "\specialcell{GP mental health \\ consultation \\ by year 2}"
label var psykw24 "\specialcell{Psychologist \\ psychiatrist \\ by year 2}"
label var adm24 "\specialcell{Mental health \\ hospital \\ contact \\ by year 2}"
label var unim12 "\specialcell{Universal Nurse Visits \\ Year 1}"
label var needm12 "\specialcell{Targeted Nurse Visits \\ Year 1}"
label var phonem12 "\specialcell{Nurse Phone Contacts \\ Year 1}"
label var newassignment "\specialcell{Multipe Mother Groups \\ Assigned}"
label var sumhonorar0 "\specialcell{GP expenses \\ 6-12 months}"
label var sumhonorar1 "\specialcell{GP expenses \\ 2nd year}"
label var hospage0 "\specialcell{Hospitalization \\ 6-12 months}"
label var hospage1 "\specialcell{Hospitalization \\ 2nd year}"
label var sumhonorarm0 "\specialcell{GP expenses \\ 6-12 months}"
label var sumhonorarm1 "\specialcell{GP expenses \\ 2nd year}"
label var hospagem0 "\specialcell{Hospitalization \\ 6-12 months}"
label var hospagem1 "\specialcell{Hospitalization \\ 2nd year}"
label var fert2 "\specialcell{Gave birth \\ 2 years after}"
label var fert3 "\specialcell{Gave birth \\ 3 years after}"
label var fert5 "\specialcell{Gave birth \\ 5 years after}"
label var cohab2 "\specialcell{Cohabits \\ 2 years after}"
label var cohab3 "\specialcell{Cohabits \\ 3 years after}"
label var cohab5 "\specialcell{Cohabits \\ 5 years after}"
label var any2_robust "\specialcell{Mental health \\ contact \\ by year 2}"
label var gpm_cons_robust "\specialcell{GP mental health \\ consultation \\ by year 2}"
label var gpm_psyk_robust "\specialcell{Psychologist \\ psychiatrist \\ by year 2}"
label var postbirth2y_robust "\specialcell{Mental health \\ contact \\ by year 2}"
label var peer_treat "Depressed Peer"

********************************************************************************
* Save datasets
********************************************************************************
save "$work\final_data", replace

preserve
keep if tested_avg >= 1
keep id_mother mfr_dob 
save "$work\mothers1", replace
restore

preserve
keep if tested_avg >= 1
keep id_father mfr_dob 
save "$work\fathers1", replace
restore


keep if tested_avg >= 1
save "$work\final_data1", replace

